---
title: "Mixed MLM Analysis"
author: "Matt S"
date: "2/24/2015"
output: html_document
---

Require Packages

```{r}

install.packages(c("psych","nlme", "reshape", "ICC", "plyr", "data.table"))

require("psych"); require("nlme"); require("plyr"); require("data.table")

```

Load Data

```{r, echo=TRUE}

#Set Working Directory (insert appropriate directory below)
setwd

#Load data
mccrae <- read.csv("1_Big Five By Country.csv")
ics2001 <- read.csv("3_International College Survey 2001 Final.csv")
countries <- read.csv("5_Countries and Regions.csv")
controls <- read.csv("6_Country_Controls.csv")
gdp <- read.csv("7_2005 World Bank GDP.csv")
country_happiness <- read.csv("8_Country Happiness.csv")

```

Adjust for reverse coding, aggregate variables and compute age in ics2001.

```{r}

#Reverse coding
ics2001$ext2 <- 6 - ics2001$ext2
ics2001$ext3 <- 6 - ics2001$ext3
ics2001$ext6 <- 6 - ics2001$ext6

#Average Extroversion
ics2001$ext_tot <- (ics2001$ext1 + ics2001$ext2 + ics2001$ext3 + ics2001$ext4 + ics2001$ext5 + ics2001$ext6)/6

#Average Subjective Well-Being
ics2001$swl_tot <- (ics2001$swl1 + ics2001$swl2 + ics2001$swl3 + ics2001$swl4 + ics2001$swl5)/5

#Recode age into numeric variable
ics2001$age_num[ics2001$age=="under 18" & !is.na(ics2001$age)]  <- 0
ics2001$age_num[ics2001$age=="18-19" & !is.na(ics2001$age)]  <- 1
ics2001$age_num[ics2001$age=="20-21" & !is.na(ics2001$age)]  <- 2
ics2001$age_num[ics2001$age=="22-23" & !is.na(ics2001$age)]  <- 3
ics2001$age_num[ics2001$age=="24-25" & !is.na(ics2001$age)]  <- 4
ics2001$age_num[ics2001$age=="26-27" & !is.na(ics2001$age)]  <- 5
ics2001$age_num[ics2001$age=="28 and over" & !is.na(ics2001$age)]  <- 6

#Describe country happiness
describe(country_happiness$swl_wdh)

#Center country happiness
country_happiness$swl_wdh <- country_happiness$swl_wdh - mean(country_happiness$swl_wdh)

```

Merge Data

```{r}

#Rename variable in controls data
controls <- rename(controls, c("country"="code"))
country_happiness <- rename(country_happiness, c("country"="code"))

#Merge
country_data1 <- merge(controls, countries, by="code")
country_data2 <- merge(country_data1, gdp,  by="code")
    #Log transform GDP
    country_data2$log_gdp <- log(100+country_data2$gdp) 
country_data3 <- merge(country_data2, country_happiness, by="code")
mixed_data <- merge(country_data3, ics2001, by="country")
final_data <- merge(mixed_data, mccrae, by="country")


```

Country-Level Descriptives for Controls

```{r}

names(country_data3)

describe(country_data3$individualism)
describe(country_data3$power_distance)
describe(country_data3$swl_wdh)

```

Clean Dataset

```{r}

final_data <- subset(final_data, acq == "0")
final_data <- subset(final_data,  !country == "Egypt")
final_data <- subset(final_data, !country == "Zimbabwe")
final_data <- (final_data[,c("code", "country", "UN.region", "age_num", "sex", "swl_tot", "ext_tot", "extrov", "traditional_rational_values", "power_distance", "individualism", "log_gdp", "swl_wdh")])

```

Descriptives of final data set. 

```{r}

#Center age
final_data$age_num <- as.numeric(final_data$age_num - mean(final_data$age_num))

#Descriptives
describe(final_data$age_num)
describe(final_data$sex)

```

Create group means and SD's for EXT and SWL. This is to facilitate the removal of outliers. Group means will be calculated again subsequently, for inclusion in the final model. 

```{r}

#Means
country.means <- data.table(final_data)
country.means <- data.frame(country.means[,list(country.ext_tot=mean(ext_tot,na.rm=T),country.swl_tot=mean(swl_tot,na.rm=T)), by=country])

#Rename Variables
country.means <- rename(country.means, c("country.ext_tot"="EXT_M", "country.swl_tot"="SWL_M"))

#SD's
country.sds <- data.table(final_data)
country.sds <- data.frame(country.sds[,list(country.ext_tot=sd(ext_tot,na.rm=T),country.swl_tot=sd(swl_tot,na.rm=T)),by=country])

#Rename Variables
country.sds <- rename(country.sds, c("country.ext_tot"="EXT_SD", "country.swl_tot"="SWL_SD"))

#Merge Means and SD's
country.means_sds <- merge(country.means,country.sds,by="country")
final_data <- merge(final_data,country.means_sds,by="country")

      
```

Compute z-scores and delete outliers for individual Extorversion and SWL. 

```{r}

#Compute Z-Scores
final_data$z_ext <- (final_data$ext_tot-final_data$EXT_M)/final_data$EXT_SD
final_data$z_swl <- (final_data$swl_tot-final_data$SWL_M)/final_data$SWL_SD

#Delete outliers 
final_data$ext_tot[final_data$z_ext>3 & !is.na(final_data$ext_tot)] <- NA
final_data$ext_tot[final_data$z_ext<(-3) & !is.na(final_data$ext_tot)] <- NA
final_data$swl_tot[final_data$z_swl>3 & !is.na(final_data$swl_tot)] <- NA
final_data$swl_tot[final_data$z_swl<(-3) & !is.na(final_data$swl_tot)] <- NA

```

Create group means for Extroversion, this time without outliers. Then, group mean center extroversion. 

```{r}

#Delete now irrelevant variables from data set
final_data <- final_data[,c("code", "country", "UN.region", "age_num", "sex", "swl_tot", "ext_tot", "extrov", "traditional_rational_values", "power_distance", "individualism", "log_gdp", "swl_wdh")]

#Create new group means for extroversion to dataset
ext_means <- data.table(final_data)
ext_means <- data.frame(ext_means[,list(country.ext_tot=mean(ext_tot,na.rm=T)), by=country])

#Rename variable
ext_means <- rename(ext_means, c("country.ext_tot"="EXT_M"))

#Merge dataset
final_data <- merge(final_data, ext_means, by="country")

#Group mean center extroversion
final_data$ext_c <- final_data$ext_tot - final_data$EXT_M

final_data <- na.omit(final_data)

```

Mixed MLM Analysis.  

```{r}

#Model with only individual-country extroversion as interaction. 
model1.1 <- lme(swl_tot ~ age_num + sex + log_gdp + individualism + power_distance + swl_wdh + ext_c*extrov, data=final_data, random = ~ 1 + ext_c|country, correlation=corCompSymm(), method="REML", na.action=na.omit)
summary(model1.1)

#Backward reduction, beginning with full interaction model, alpha = .01

#F-Test for interaction model
model2.1 <- lme(swl_tot ~ age_num + sex + log_gdp + ext_c*(individualism + power_distance + extrov + swl_wdh), data=final_data, random = ~ 1 + ext_c|country, correlation=corCompSymm(), method="ML", na.action=na.omit)
model2.2 <- lme(swl_tot ~ age_num + sex + ext_c + log_gdp + extrov + swl_wdh + individualism + power_distance, data=final_data, random = ~ 1 + ext_c|country, correlation=corCompSymm(), method="ML", na.action=na.omit)
anova(model3.1, model3.2)

#Backward reduction of main effect model.
model3.1 <- lme(swl_tot ~ age_num + sex + ext_c + log_gdp + extrov + swl_wdh + individualism + power_distance, data=final_data, random = ~ 1 + ext_c|country, correlation=corCompSymm(), method="REML", na.action=na.omit)
summary(model3.1)

model3.2 <- lme(swl_tot ~ age_num + sex + ext_c + extrov + swl_wdh + power_distance, data=final_data, random = ~ 1 + ext_c|country, correlation=corCompSymm(), method="REML", na.action=na.omit)
summary(model3.2)

model3.3 <- lme(swl_tot ~ age_num + sex + ext_c + extrov + swl_wdh, data=final_data, random = ~ 1 + ext_c|country, correlation=corCompSymm(), method="REML", na.action=na.omit)
summary(model3.3)

#Final Model
model3.4 <- lme(swl_tot ~ age_num + sex + ext_c + swl_wdh, data=final_data, random = ~ 1 + ext_c|country, correlation=corCompSymm(), method="REML", na.action=na.omit)
summary(model3.4)

#Confidence intervals: ext_c
0.300889 - 1.96*0.01739242
0.300889 + 1.96*0.01739242

#Confidence intervals: swl_wdh
0.450740 - 1.96*0.10496598
0.450740 + 1.96*0.10496598

#CI for age
-0.048459 - 1.96*0.01111209
-0.048459 + 1.96*0.01111209

#CI for sex
-0.144362 - 1.96*0.02868785
-0.144362 + 1.96*0.02868785

#CI for intercept 
4.246691 - 1.96*0.11657816
4.246691 + 1.96*0.11657816

#Final Model
model3.4 <- lme(swl_tot ~ age_num + sex + ext_c*extrov, data=final_data, random = ~ 1 + ext_c|country, correlation=corCompSymm(), method="REML", na.action=na.omit)
summary(model3.4)

```
